library(tidyverse)  # ggplot(), %>%, mutate(), and friends 
library(broom)
library(MatchIt)  # Match things
library(Rcpp)
library(MASS)
library(modelsummary)
library(IRdisplay)
library(haven)
library(dplyr)
library(jtools)
library(car)
library(margins)
library(marginaleffects)
library(cobalt)
library(ggplot2)

# set directory


# IMPORT MAP
map <- read_csv('C:/./dataset76_20.csv')

regions <- read_csv('C:/./states_region.csv')
regions <- dplyr::rename(regions, State=state_x)


# replace string for being identical acrsso map and nibrs regions
map$State[map$State == 'Rhodes Island'] <- 'Rhode Island'
map$State[map$State == 'District of Columbia'] <- 'District Of Columbia'
# adding missing states
regions <- regions %>% add_row(...1= 888, State = "Alaska", country_region = "west", country_division="Pacific")
regions <- regions %>% add_row(...1= 999, State = "California", country_region = "west", country_division="Pacific")
regions <- regions %>% add_row(...1= 45878, State = "Florida", country_region = "south", country_division="South Atlantic")
regions <- regions %>% add_row(...1= 97666, State = "New Jersey", country_region = "north east", country_division="Middle Atlantic")


# add region and division to map dataset
map<- merge(map, regions, by="State")
library(janitor)

#clean labels: can be done by simply
map <- clean_names(map)

# change to character exposure variable
map$vic_race_black <- as.character(map$vic_race_black)

# final cleaning
map <- subset(map, map$agentype != 4)
map <- subset(map, map$state != "PAPSP8")



########### Models


############## MAP (agency info + states)
m.out3 <- matchit(vic_race_black ~ vic_count + off_count + vic_sex+
                    age_cat+
                    decade+
                    agentype+
                    monthly_agency_overlap+
                    state,                        
                  data=map,
                  method = "exact",
                  estimand="ATT")

bal.tab(m.out3) # check balance

# get dataset
matched_dataset3 <- match.data(m.out3)


######### modeling



# adjusted
model_matched3 <- glm(solved_yes ~ vic_race_black + 
                        vic_count + off_count + vic_sex+
                        age_cat + 
                        decade+
                        weapon+ # added only in adjustment
                        circumstance+ # added only in adjustment
                        agentype+	 
                        factor(monthly_agency_overlap)+
                        state, 
                      family=binomial, data = matched_dataset3, weights=weights)

#tidy(model_matched3)

#summ(model_matched3, exp = TRUE, robust=TRUE, digits=3)
ame_3 <-comparisons(model_matched3, variables = "vic_race_black", newdata = subset(matched_dataset3, vic_race_black == 1)) |>
  summary()
ame_3$dataset <- 'MAP (1976-2020)'
ame_3$model <- 'Adjusted/Matched'


# non-matched
model_unmatched3 <- glm(solved_yes ~ vic_race_black + 
                          vic_count + off_count + vic_sex+
                          age_cat + 
                          decade+
                          weapon+
                          circumstance+
                          agentype+	 
                          factor(monthly_agency_overlap)+
                          state, 
                        family=binomial, data = map)

ame_3_unmatched<-comparisons(model_unmatched3, variables = "vic_race_black", newdata = subset(map, vic_race_black == 1)) |>
  summary()
ame_3_unmatched$dataset <- 'MAP (1976-2020)'
ame_3_unmatched$model <- 'Adjusted/Unmatched'

summ(model_unmatched3, exp = TRUE, robust=TRUE, digits=3)

# plot (preprocessing)
ame3_list <- do.call("rbind", list(ame_3, ame_3_unmatched))
ame3_list <- ame3_list[order(ame3_list$dataset),]
ame3_list$dataset<- fct_inorder(ame3_list$dataset)
ame3_list$model = factor(ame3_list$model, levels=c("Adjusted/Matched", "Adjusted/Unmatched"))
# plot
dodge <- position_dodge(width=0.5)
ame3_plot <- ggplot(ame3_list, x=factor(ame3_list$dataset, levels=unique(ame3_list$dataset)), y=estimate, color=model, aes(ymin=-0.06,ymax=0.06))+
  geom_point(aes(x=factor(ame3_list$dataset, levels=unique(ame3_list$dataset)), y=estimate,shape=model, color=model), size=2.5, position=dodge)+
  scale_y_continuous(breaks =c("-.06"=-0.06, "-.04"=-0.04,"-.02"=-0.02, "0"=0,
                               0, ".02"=0.02, ".04"=0.04, ".06"=0.06))+
  geom_errorbar(aes(x=dataset, ymin=conf.low, ymax=conf.high, group=model, color=model), width=0.1,position=dodge, name="Model", labels = c("Adjusted/Matched", "Adjusted/Unmatched"))+
  scale_color_manual(values=c("Adjusted/Matched"="red","Adjusted/Unmatched"="blue"), name="Model", labels = c("Adjusted/Matched", "Adjusted/Unmatched"))+
  scale_shape_manual(values=c("Adjusted/Matched"=19,"Adjusted/Unmatched"= 15), name="Model", labels = c("Adjusted/Matched", "Adjusted/Unmatched" ))+
  labs(x= "Dataset", y = "Average Marginal Effect (95% C.I.)")+
  theme_bw()+
  geom_hline(aes(yintercept=0), colour="#990000", linetype="dashed")+
  coord_flip()+theme(axis.text.y = element_text(size=12)) + 
  theme(axis.text.x = element_text(size=12), axis.title=element_text(size=16))+
  #theme(text = element_text(family = "Times New Roman"))+
  theme(legend.title=element_text(size=14), legend.key.size=unit(0.5, 'cm'), legend.text = element_text(size=14))+theme(legend.position="bottom")+
  guides(colour=guide_legend(nrow=1,byrow=TRUE))
